op {
  graph_op_name: "ResourceApplyAdamWithAmsgrad"
  in_arg {
    name: "var"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "m"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "v"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "vhat"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "beta1_power"
    description: <<END
Must be a scalar.
END
  }
  in_arg {
    name: "beta2_power"
    description: <<END
Must be a scalar.
END
  }
  in_arg {
    name: "lr"
    description: <<END
Scaling factor. Must be a scalar.
END
  }
  in_arg {
    name: "beta1"
    description: <<END
Momentum factor. Must be a scalar.
END
  }
  in_arg {
    name: "beta2"
    description: <<END
Momentum factor. Must be a scalar.
END
  }
  in_arg {
    name: "epsilon"
    description: <<END
Ridge term. Must be a scalar.
END
  }
  in_arg {
    name: "grad"
    description: <<END
The gradient.
END
  }
  attr {
    name: "use_locking"
    description: <<END
If `True`, updating of the var, m, and v tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
END
  }
  summary: "Update \'*var\' according to the Adam algorithm."
  description: <<END
$$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
$$vhat_t := max{vhat_{t-1}, v_t}$$
$$variable := variable - lr_t * m_t / (\sqrt{vhat_t} + \epsilon)$$
END
}
